'''
Created on 03.11.2013

@author: Liesa

read a text file, then print all word types and the number of tokens of each word, to a new file 
'''
import doctest
import trunk.data
import os

words = dict()

def analyse(input, output):
    """
    Text analysing functions, counts different word counts and 
    prints them alongside the word
    
    >>> analyse(os.path.join(trunk.data.__path__[0], 'input.txt'), os.path.join(trunk.data.__path__[0], 'output_text_analyse.txt'))
    and/conj\t1
    dangerously/adv\t1
    drives/vb\t1
    he/pron\t1
    quickly/adv\t1
    """
    
    with open(input) as f:
        for line in f:
            line = line.lower().split()
            for word in line:
                words[word] = words.get(word, 0) + 1
                
    with open(output, "w") as g:
        for word in sorted(words.keys()):
            count = words[word]
            g.write("%s\t%d\n" % (word, count))
            print("%s\t%d" % (word, count))

if __name__ == '__main__':
    #analyse(os.path.join(trunk.data.__path__[0], 'input.txt'), os.path.join(trunk.data.__path__[0], 'output_text_analyse.txt'))

    print(doctest.testmod())